### Measuring Political Attitudes with Word Association - Co-occurrence Analysis ###
## Ze Han, Ph.D. Student, Princeton University Department of Politics, zeh@princeton.edu ##
## Naijia Liu, Assistant Professor, Harvard University Department of Government, naijialiu@fas.harvard.edu ##
## Rory Truex, Associate Professor, Princeton University Department of Politics and School of Public and International Affairs, rtruex@princeton.edu ##

setwd("~/Desktop/WAT_PoQ/code")

# Load Packages
rm(list=ls(all=TRUE))

library(tidyverse)
library(igraph)

# Load Datasets
ccp.coocnet <- read.csv("./data/ccp.coocnet.csv", stringsAsFactors = FALSE)

#First Order 01 Threshold
firstorder.01.list<-ccp.coocnet$response.eng[1:23]
ccp.coocnet$type.firstorder.01<-0
ccp.coocnet$type.firstorder.01[ccp.coocnet$order=="first"]<-1
ccp.coocnet$type.firstorder.01[ccp.coocnet$order=="second"]<-0
for (i in firstorder.01.list) {
  try(eval(parse(text=paste("ccp.coocnet$type.firstorder.01[grepl('",i,"', ccp.coocnet$response.eng)]<-1",sep="")))) 
}
ccp.coocnet$type.firstorder.01[ccp.coocnet$order=="third"]<-0
ccp.coocnet.firstorder.01<-subset(ccp.coocnet,ccp.coocnet$type.firstorder.01==1)

ccp.coocnet.firstorder.01$match<-0
ccp.coocnet.firstorder.01$match[ccp.coocnet.firstorder.01$cue.eng==ccp.coocnet.firstorder.01$response.eng]<-1
ccp.coocnet.firstorder.01<-subset(ccp.coocnet.firstorder.01, ccp.coocnet.firstorder.01$match!=1) 

cue.words <- ccp.coocnet.firstorder.01 %>%
  distinct(cue.eng) %>%
  mutate(label = cue.eng) %>%
  select(-cue.eng)

response.words <- ccp.coocnet.firstorder.01 %>%
  distinct(response.eng) %>%
  mutate(label = response.eng) %>%
  select(-response.eng)

nodes <- full_join(cue.words, response.words, by = "label")
nodes$id<-1:nrow(nodes)

edgelist.firstorder.01<-data.frame(cbind(ccp.coocnet.firstorder.01$cue.eng, ccp.coocnet.firstorder.01$response.eng, ccp.coocnet.firstorder.01$freq, ccp.coocnet.firstorder.01$prop.alt, ccp.coocnet.firstorder.01$missing, ccp.coocnet.firstorder.01$source))
colnames(edgelist.firstorder.01)<- c("cue.eng","response.eng","freq","prop.alt","missing","source")

edgelist.firstorder.01<-merge(edgelist.firstorder.01,nodes, by.x="cue.eng",by.y="label",all.x=TRUE,all.y=FALSE) 
colnames(edgelist.firstorder.01)<- c("cue.eng","response.eng","freq","prop.alt","missing","source","from")
edgelist.firstorder.01<-merge(edgelist.firstorder.01,nodes, by.x="response.eng",by.y="label",all.x=TRUE,all.y=FALSE) 
colnames(edgelist.firstorder.01)<- c("cue.eng","response.eng","freq","prop.alt","missing","source","from","to")

edgelist.firstorder.01<-as_tibble(edgelist.firstorder.01)
edges<-select(edgelist.firstorder.01, from, to, prop.alt,source)
nodes<-as_tibble(nodes)
nodes<-select(nodes, id, label)

ccp.igraph <- graph_from_data_frame(d = edges, vertices = nodes, directed = FALSE)

# Assign colors to edges and nodes 
V(ccp.igraph)$color <- "white"
E(ccp.igraph)$color <- "grey10"
E(ccp.igraph)$lty <- ifelse(E(ccp.igraph)$source == "wat", 1, 2) 
E(ccp.igraph)$color <- ifelse(E(ccp.igraph)$source == "wat", "grey50", "grey65") 

# Disable edges with radius
E(ccp.igraph)$curved <- 0 

# Edge thickness
E(ccp.igraph)$width <- .5

# Define the frame and spacing for the plot
par(mai=c(0,0,0,0)) 

pdf('./figures/fig-ccp-igraph-firstorder01.pdf', width=18, height=18)
set.seed(08544)
plot(ccp.igraph ,              
     layout = layout.fruchterman.reingold,  # Force Directed Layout 
     vertex.shape = NULL,
     vertex.label.family = "sans",
     vertex.label.cex = 1.15,
     vertex.label.dist = 0.5,           # Labels of the nodes moved slightly
     vertex.frame.color = 'grey50',
     vertex.label.color = 'black',      # Color of node names
     vertex.label.font = 1,         # Font of node names
     vertex.size=1,
     vertex.label = V(ccp.igraph)$label,       # node names
     vertex.label.cex = 1.8 # font size of node names 
)
dev.off()

#First Order 02 Threshold
firstorder.02.list<-ccp.coocnet$response.eng[ccp.coocnet$order=="first" & ccp.coocnet$link.02==1]
ccp.coocnet$type.firstorder.02<-0
ccp.coocnet$type.firstorder.02[ccp.coocnet$order=="first"]<-1
ccp.coocnet$type.firstorder.02[ccp.coocnet$order=="second"]<-0
for (i in firstorder.02.list) {
  try(eval(parse(text=paste("ccp.coocnet$type.firstorder.02[grepl('",i,"', ccp.coocnet$response.eng)]<-1",sep="")))) 
}
ccp.coocnet$type.firstorder.02[ccp.coocnet$order=="third"]<-0
ccp.coocnet$type.firstorder.02[ccp.coocnet$link.02==0]<-0
ccp.coocnet.firstorder.02<-subset(ccp.coocnet,ccp.coocnet$type.firstorder.02==1)

ccp.coocnet.firstorder.02$match<-0
ccp.coocnet.firstorder.02$match[ccp.coocnet.firstorder.02$cue.eng==ccp.coocnet.firstorder.02$response.eng]<-1
ccp.coocnet.firstorder.02<-subset(ccp.coocnet.firstorder.02, ccp.coocnet.firstorder.02$match!=1) 

cue.words <- ccp.coocnet.firstorder.02 %>%
  distinct(cue.eng) %>%
  mutate(label = cue.eng) %>%
  select(-cue.eng)

response.words <- ccp.coocnet.firstorder.02 %>%
  distinct(response.eng) %>%
  mutate(label = response.eng) %>%
  select(-response.eng)

nodes <- full_join(cue.words, response.words, by = "label")
nodes$id<-1:nrow(nodes)

edgelist.firstorder.02<-data.frame(cbind(ccp.coocnet.firstorder.02$cue.eng, ccp.coocnet.firstorder.02$response.eng, ccp.coocnet.firstorder.02$freq, ccp.coocnet.firstorder.02$prop.alt, ccp.coocnet.firstorder.02$missing, ccp.coocnet.firstorder.02$source))
colnames(edgelist.firstorder.02)<- c("cue.eng","response.eng","freq","prop.alt","missing","source")

edgelist.firstorder.02<-merge(edgelist.firstorder.02,nodes, by.x="cue.eng",by.y="label",all.x=TRUE,all.y=FALSE) 
colnames(edgelist.firstorder.02)<- c("cue.eng","response.eng","freq","prop.alt","missing","source","from")
edgelist.firstorder.02<-merge(edgelist.firstorder.02,nodes, by.x="response.eng",by.y="label",all.x=TRUE,all.y=FALSE) 
colnames(edgelist.firstorder.02)<- c("cue.eng","response.eng","freq","prop.alt","missing","source","from","to")

edgelist.firstorder.02<-as_tibble(edgelist.firstorder.02)
edges<-select(edgelist.firstorder.02, from, to, prop.alt,source)
nodes<-as_tibble(nodes)
nodes<-select(nodes, id, label)

ccp.igraph <- graph_from_data_frame(d = edges, vertices = nodes, directed = FALSE)

# Assign colors to edges and nodes 
V(ccp.igraph)$color <- "white"
E(ccp.igraph)$color <- "grey10"
E(ccp.igraph)$lty <- ifelse(E(ccp.igraph)$source == "wat", 1, 2) 
E(ccp.igraph)$color <- ifelse(E(ccp.igraph)$source == "wat", "grey50", "grey65") 

# Disable edges with radius
E(ccp.igraph)$curved <- 0 

# edge thickness
E(ccp.igraph)$width <- .5

# Define the frame and spacing for the plot
par(mai=c(0,0,0,0)) 

pdf('./figures/fig-ccp-igraph-firstorder02.pdf', width=18, height=18)
set.seed(08544)
plot(ccp.igraph ,              
     layout = layout.fruchterman.reingold,  # Force Directed Layout 
     vertex.shape = NULL,
     vertex.label.family = "sans",
     vertex.label.cex = 1.15,
     vertex.label.dist = 0.5,           # Labels of the nodes moved slightly
     vertex.frame.color = 'grey50',
     vertex.label.color = 'black',      # Color of node names
     vertex.label.font = 1,         # Font of node names
     vertex.size=1,
     vertex.label = V(ccp.igraph)$label,       # node names
     vertex.label.cex = 1.8 # font size of node names 
)
dev.off()

#Second Order 02 Threshold
secondorder.02.list<-ccp.coocnet$response.eng[ccp.coocnet$order=="first" & ccp.coocnet$link.02==1]
ccp.coocnet.secondorder.02<-subset(ccp.coocnet,ccp.coocnet$link.02!=0)

ccp.coocnet.secondorder.02$type.secondorder.02<-0
ccp.coocnet.secondorder.02$type.secondorder.02[ccp.coocnet.secondorder.02$order=="first"]<-1
for (i in secondorder.02.list) {
  try(eval(parse(text=paste("ccp.coocnet.secondorder.02$type.secondorder.02[grepl('^",i,"$', ccp.coocnet.secondorder.02$cue.eng) & ccp.coocnet.secondorder.02$order=='second']<-1",sep="")))) 
}

ccp.coocnet.secondorder.02<-subset(ccp.coocnet.secondorder.02,ccp.coocnet.secondorder.02$type.secondorder.02==1)

cue.words <- ccp.coocnet.secondorder.02 %>%
  distinct(cue.eng) %>%
  mutate(label = cue.eng) %>%
  select(-cue.eng)

response.words <- ccp.coocnet.secondorder.02 %>%
  distinct(response.eng) %>%
  mutate(label = response.eng) %>%
  select(-response.eng)

nodes <- full_join(cue.words, response.words, by = "label")
nodes<-subset(nodes, nodes$label!="let's go")
nodelist<-as.list(nodes)

ccp.coocnet.secondorder.02.third<-subset(ccp.coocnet,ccp.coocnet$link.02!=0 & ccp.coocnet$order=="third")
for (i in nodelist) {
  try(eval(parse(text=paste("ccp.coocnet.secondorder.02.third$type.secondorder.02.cue.eng[grepl('^",i,"$', ccp.coocnet.secondorder.02.third$cue.eng)]<-1",sep="")))) 
  try(eval(parse(text=paste("ccp.coocnet.secondorder.02.third$type.secondorder.02.resp.eng[grepl('^",i,"$', ccp.coocnet.secondorder.02.third$response.eng)]<-1",sep="")))) 
}
ccp.coocnet.secondorder.02.third$type.secondorder.02[ccp.coocnet.secondorder.02.third$type.secondorder.02.cue.eng==1 & ccp.coocnet.secondorder.02.third$type.secondorder.02.resp.eng==1]<-1

ccp.coocnet.secondorder.02.third<-subset(ccp.coocnet.secondorder.02.third,ccp.coocnet.secondorder.02.third$type.secondorder.02==1)
ccp.coocnet.secondorder.02.third<-ccp.coocnet.secondorder.02.third[,-c(15,16)]

ccp.coocnet.secondorder.02<-rbind(ccp.coocnet.secondorder.02, ccp.coocnet.secondorder.02.third)

ccp.coocnet.secondorder.02$match<-0
ccp.coocnet.secondorder.02$match[ccp.coocnet.secondorder.02$cue.eng==ccp.coocnet.secondorder.02$response.eng]<-1
ccp.coocnet.secondorder.02<-subset(ccp.coocnet.secondorder.02, ccp.coocnet.secondorder.02$match!=1) 

cue.words <- ccp.coocnet.secondorder.02 %>%
  distinct(cue.eng) %>%
  mutate(label = cue.eng) %>%
  select(-cue.eng)

response.words <- ccp.coocnet.secondorder.02 %>%
  distinct(response.eng) %>%
  mutate(label = response.eng) %>%
  select(-response.eng)

nodes <- full_join(cue.words, response.words, by = "label")
nodes$id<-1:nrow(nodes)

edgelist.secondorder.02<-data.frame(cbind(ccp.coocnet.secondorder.02$cue.eng, ccp.coocnet.secondorder.02$response.eng, ccp.coocnet.secondorder.02$freq, ccp.coocnet.secondorder.02$prop.alt, ccp.coocnet.secondorder.02$missing, ccp.coocnet.secondorder.02$source))
colnames(edgelist.secondorder.02)<- c("cue.eng","response.eng","freq","prop.alt","missing","source")

edgelist.secondorder.02<-merge(edgelist.secondorder.02,nodes, by.x="cue.eng",by.y="label",all.x=TRUE,all.y=FALSE) 
colnames(edgelist.secondorder.02)<- c("cue.eng","response.eng","freq","prop.alt","missing","source","from")
edgelist.secondorder.02<-merge(edgelist.secondorder.02,nodes, by.x="response.eng",by.y="label",all.x=TRUE,all.y=FALSE) 
colnames(edgelist.secondorder.02)<- c("cue.eng","response.eng","freq","prop.alt","missing","source","from","to")

edgelist.secondorder.02<-as_tibble(edgelist.secondorder.02)
edges<-select(edgelist.secondorder.02, from, to, prop.alt,source)
nodes<-as_tibble(nodes)
nodes<-select(nodes, id, label)

ccp.igraph <- graph_from_data_frame(d = edges, vertices = nodes, directed = FALSE)

# Assign colors to edges and nodes 
V(ccp.igraph)$color <- "white"
E(ccp.igraph)$color <- "grey10"
E(ccp.igraph)$lty <- ifelse(E(ccp.igraph)$source == "wat", 1, 2) 
E(ccp.igraph)$color <- ifelse(E(ccp.igraph)$source == "wat", "grey50", "grey65") 

# Disable edges with radius
E(ccp.igraph)$curved <- 0 

# edge thickness
E(ccp.igraph)$width <- .5

# Define the frame and spacing for the plot
par(mai=c(0,0,0,0)) 

pdf('./figures/fig-ccp-igraph-secondorder02.pdf', width=18, height=18)
set.seed(08544)
plot(ccp.igraph ,              
     layout = layout.fruchterman.reingold,  # Force Directed Layout 
     vertex.shape = NULL,
     vertex.label.family = "sans",
     vertex.label.cex = 1.15,
     vertex.label.dist = 0.5,           # Labels of the nodes moved slightly
     vertex.frame.color = 'grey50',
     vertex.label.color = 'black',      # Color of node names
     vertex.label.font = 1,         # Font of node names
     vertex.size=1,
     vertex.label = V(ccp.igraph)$label,       # node names
     vertex.label.cex = 1.8 # font size of node names 
)
dev.off()
